

clear all
set more off
set memo 4g
set matsize 800
capture log close


cd ~/Crime

log using Sexratio_UHS.log, text replace


********************************************************************************************

* Sex ratio measures used for Table 6 in the paper (year 1988-1995): Census 1990 residential sex ratio

********************************************************************************************

	use census90.dta, clear

	ge prov=int(region1/10000)

	tab prov 

	keep if prov==11|prov==21|prov==33|prov==51|prov==44|prov==61

	keep prov birthyr sex hukou

	replace birthyr=birthyr+1000

	replace sex=0 if sex==2 /* male=1; female=0 */

	ge rural=(hukou==1)

	drop if birthyr<1900

	drop hukou

	compress

	save census_90.dta, replace

	clear


	* census 90, urban sample, female 

	use census_90.dta, clear

	tab sex

	tab rural

	keep if rural==0

	rename sex male

	rename prov region

	g n=_n

	collapse (count) n, by (region birthyr male)

	reshape wide n, i(region birthyr) j(male)

	rename n0 n_female

	rename n1 n_male

	g age=2000-birthyr

	sort region birthyr

	tsset region birthyr

	g x1=l.n_f
	g x2=l2.n_f

	tsset region age

	g x3=l.n_f
	g x4=l2.n_f

	g x=n_f+x1+x2+x3+x4


	sort region birthyr

	tsset region birthyr

	g y1=n_m
	g y2=l.n_m
	g y3=l2.n_m
	g y4=l3.n_m
	g y5=l4.n_m

	g y=y1+y2+y3+y4+y5

	g ratio_fe_90_urb_5=y/x
	g fe_90_urb_5=x+y
	g ln_fe_90_urb_5=ln(x+y)


	keep region birthyr ratio fe_90_urb_5 ln_fe_90_urb_5

	save ratio_fe_90_urb_5, replace

	clear

	
	* census 90, rural sample, female 

	use census_90.dta, clear

	tab sex

	tab rural

	keep if rural==1

	rename sex male

	rename prov region

	g n=_n

	collapse (count) n, by (region birthyr male)

	reshape wide n, i(region birthyr) j(male)

	rename n0 n_female

	rename n1 n_male

	g age=2000-birthyr

	sort region birthyr

	tsset region birthyr

	g x1=l.n_f
	g x2=l2.n_f

	tsset region age

	g x3=l.n_f
	g x4=l2.n_f

	g x=n_f+x1+x2+x3+x4


	sort region birthyr

	tsset region birthyr

	g y1=n_m
	g y2=l.n_m
	g y3=l2.n_m
	g y4=l3.n_m
	g y5=l4.n_m

	g y=y1+y2+y3+y4+y5

	g ratio_fe_90_rur_5=y/x
	g fe_90_rur_5=x+y
	g ln_fe_90_rur_5=ln(x+y)


	keep region birthyr ratio fe_90_rur_5 ln_fe_90_rur_5

	save ratio_fe_90_rur_5, replace

	clear



	* census 90, urban sample, male 
	
	use census_90.dta, clear

	tab sex

	tab rural

	keep if rural==0

	rename sex male

	rename prov region

	g n=_n

	collapse (count) n, by (region birthyr male)

	reshape wide n, i(region birthyr) j(male)

	rename n0 n_female

	rename n1 n_male

	g age=2000-birthyr


	sort region birthyr

	tsset region birthyr

	g x1=l.n_m
	g x2=l2.n_m

	tsset region age

	g x3=l.n_m
	g x4=l2.n_m

	g x=n_m+x1+x2+x3+x4


	sort region age

	tsset region age

	g y1=n_f
	g y2=l.n_f
	g y3=l2.n_f
	g y4=l3.n_f
	g y5=l4.n_f

	g y=y1+y2+y3+y4+y5

	g ratio_ma_90_urb_5=x/y
	g ma_90_urb_5=x+y
	g ln_ma_90_urb_5=ln(x+y)

	keep region birthyr ratio ma_90_urb_5 ln_ma_90_urb_5

	save ratio_ma_90_urb_5, replace

	clear


	* census 90, rural sample, male 
	
	use census_90.dta, clear

	tab sex

	tab rural

	keep if rural==1

	rename sex male

	rename prov region

	g n=_n

	collapse (count) n, by (region birthyr male)

	reshape wide n, i(region birthyr) j(male)

	rename n0 n_female

	rename n1 n_male

	g age=2000-birthyr


	sort region birthyr

	tsset region birthyr

	g x1=l.n_m
	g x2=l2.n_m

	tsset region age

	g x3=l.n_m
	g x4=l2.n_m

	g x=n_m+x1+x2+x3+x4


	sort region age

	tsset region age

	g y1=n_f
	g y2=l.n_f
	g y3=l2.n_f
	g y4=l3.n_f
	g y5=l4.n_f

	g y=y1+y2+y3+y4+y5

	g ratio_ma_90_rur_5=x/y
	g ma_90_rur_5=x+y
	g ln_ma_90_rur_5=ln(x+y)

	keep region birthyr ratio ma_90_rur_5 ln_ma_90_rur_5

	save ratio_ma_90_rur_5, replace

	clear
	
	
	* combine the data
	
	use ratio_fe_90_rur_5.dta, clear
	
	keep birthyr region ratio
	rename ratio ratio
	g rural=1
	g male=0
	save temp1.dta, replace
	clear
	
	
	use ratio_fe_90_urb_5.dta, clear
	
	keep birthyr region ratio
	rename ratio ratio
	g rural=0
	g male=0
	save temp2.dta, replace
	clear
	
	
	use ratio_ma_90_rur_5.dta, clear
	
	keep birthyr region ratio
	rename ratio ratio
	g rural=1
	g male=1
	save temp3.dta, replace
	clear
	
	
	use ratio_ma_90_urb_5.dta, clear
	
	keep birthyr region ratio
	rename ratio ratio
	g rural=0
	g male=1
	save temp4.dta, replace
	clear
	
	use temp1.dta, clear
	append using temp2 temp3 temp4
	
	rename birthyr yob
	rename region id
	
	keep if yob>=1943 & yob<=1988
	
	keep if rural==0
	drop rural
	
	save Sexratio_UHS_1990.dta, replace
	
	clear

	
********************************************************************************************

* Sex ratio measures used for Table 6 in the paper (year 1996-2006): Census 2000 residential sex ratio

********************************************************************************************


use census00.dta, clear       

keep id r03 r041 r07 r08* 
drop if id==.
keep if r07==2 
drop r07 

rename r041 byear
keep if byear>=1939 & byear<=1992

ge prov=int(id/(10^16))      

ge bprov=prov
drop prov 

keep if bprov==11|bprov==21|bprov==33|bprov==51|bprov==44|bprov==61


ge male=r03==1
drop r03 

save SRxUHSbprov, replace 



use SRxUHSbprov, clear 
     

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
     use  if  byear>=`y'-`a' -2 & byear<=`y'-`a'+2 & male==1 using SRxUHSbprov, clear 
    collapse (sum) men=male, by(bprov)
    ge year=`y'
    ge age=`a'
    
    sort year bprov age 
    save y`y'a`a'm, replace
}
}

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    use  if  byear>=`y'-`a' -2 & byear<=`y'-`a'+2 & male==0 using SRxUHSbprov, clear 
    collapse (count) wom=male, by(bprov)
    ge year=`y'
    ge age=`a'
    
    sort year bprov age 
    save y`y'a`a'w, replace
}
}


use y1988a18m, clear 

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    append using y`y'a`a'm
}
}

collapse men, by(year bprov age) 

sort year bprov age 
save temp, replace 

use y1988a18w, clear 

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    append using y`y'a`a'w
}
}

collapse wom, by(year bprov age) 

sort year bprov age 
merge year bprov age using temp 

tab _merge 
drop _merge

save temp, replace 

ge male=1 
ge ratiob=men/wom
rename bprov region 
sort year region age male 
save men, replace 
clear

use temp, clear 
ge male=0
replace age=age-2  
ge ratiob=men/wom
rename bprov region 
sort year region age male 
save wom, replace 
clear


use men.dta, clear
append using wom.dta

keep if age<46 & age>17

keep region year male age ratiob

rename ratiob ratio

save Sexratio_UHS_2000_resident.dta, replace
	
clear

log close


********************************************************************************************

* Sex ratio measures used for Table V in Appendix B: Census 2000 birth sex ratio

********************************************************************************************

use census00.dta, clear       

keep id r03 r041 r07 r08* 
drop if id==.
keep if r07==2 
drop r07 

rename r041 byear
keep if byear>=1939 & byear<=1992

ge prov=int(id/(10^16))      
                                                                                                                                                                                       
* generate birth province

ge bprov=prov
replace bprov=r082 if r081==3 
drop prov 

keep if bprov==11|bprov==21|bprov==33|bprov==51|bprov==44|bprov==61


ge male=r03==1
drop r03 

save SRxUHSbprov, replace 



use SRxUHSbprov, clear 
     

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
     use  if  byear>=`y'-`a' -2 & byear<=`y'-`a'+2 & male==1 using SRxUHSbprov, clear 
    collapse (sum) men=male, by(bprov)
    ge year=`y'
    ge age=`a'
    
    sort year bprov age 
    save y`y'a`a'm, replace
}
}

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    use  if  byear>=`y'-`a' -2 & byear<=`y'-`a'+2 & male==0 using SRxUHSbprov, clear 
    collapse (count) wom=male, by(bprov)
    ge year=`y'
    ge age=`a'
    
    sort year bprov age 
    save y`y'a`a'w, replace
}
}


use y1988a18m, clear 

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    append using y`y'a`a'm
}
}

collapse men, by(year bprov age) 

sort year bprov age 
save temp, replace 

use y1988a18w, clear 

forvalues y=1988(1)2006 { 
    forvalues a=18(1)47 {
    append using y`y'a`a'w
}
}

collapse wom, by(year bprov age) 

sort year bprov age 
merge year bprov age using temp 

tab _merge 
drop _merge

save temp, replace 

ge male=1 
ge ratiob=men/wom
rename bprov region 
sort year region age male 
save men, replace 
clear

use temp, clear 
ge male=0
replace age=age-2  
ge ratiob=men/wom
rename bprov region 
sort year region age male 
save wom, replace 
clear


use men.dta, clear
append using wom.dta

keep if age<46 & age>17

keep region year male age ratiob


save Sexratio_UHS_2000_Birth.dta, replace
	
clear


log close
